knitr::opts_chunk$set(echo=TRUE)
library(mdsr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# tidy=FALSE, # display code as typed
# size="small") # slightly smaller font for code
# Location of the excel file:
url <- 'https://github.com/jmzobitz/DST490Datasets/raw/main/Unemployment.xlsx'
# Name of the file to save:
destfile <- "Unemployment.xlsx"
# Download
curl::curl_download(url, destfile)
input_data <- readxl::read_excel(destfile,
skip = 4,
sheet = 'UnemploymentMedianIncome')
glimpse(input_data)
## Rows: 3,277
## Columns: 100
## $ FIPS_Code <chr> "00000", "01000", "01001", "…
## $ State <chr> "US", "AL", "AL", "AL", "AL"…
## $ Area_Name <chr> "United States", "Alabama", …
## $ Rural_Urban_Continuum_Code_2013 <dbl> NA, NA, 2, 3, 6, 1, 1, 6, 6,…
## $ Urban_Influence_Code_2013 <dbl> NA, NA, 2, 2, 6, 1, 1, 6, 6,…
## $ Metro_2013 <dbl> NA, NA, 1, 1, 0, 1, 1, 0, 0,…
## $ Civilian_labor_force_2000 <dbl> 142601576, 2147173, 21861, 6…
## $ Employed_2000 <dbl> 136904853, 2047731, 20971, 6…
## $ Unemployed_2000 <dbl> 5696723, 99442, 890, 2609, 6…
## $ Unemployment_rate_2000 <dbl> 4.0, 4.6, 4.1, 3.7, 5.6, 5.4…
## $ Civilian_labor_force_2001 <dbl> 143786537, 2128027, 22081, 6…
## $ Employed_2001 <dbl> 136977996, 2017467, 21166, 6…
## $ Unemployed_2001 <dbl> 6808541, 110560, 915, 3024, …
## $ Unemployment_rate_2001 <dbl> 4.7, 5.2, 4.1, 4.3, 7.6, 6.8…
## $ Civilian_labor_force_2002 <dbl> 144839298, 2112621, 22161, 6…
## $ Employed_2002 <dbl> 136455783, 1987633, 21096, 6…
## $ Unemployed_2002 <dbl> 8383515, 124988, 1065, 3498,…
## $ Unemployment_rate_2002 <dbl> 5.8, 5.9, 4.8, 5.0, 7.7, 7.0…
## $ Civilian_labor_force_2003 <dbl> 145660094, 2128668, 22695, 7…
## $ Employed_2003 <dbl> 136944522, 2001568, 21557, 6…
## $ Unemployed_2003 <dbl> 8715572, 127100, 1138, 3588,…
## $ Unemployment_rate_2003 <dbl> 6.0, 6.0, 5.0, 4.9, 7.1, 5.9…
## $ Civilian_labor_force_2004 <dbl> 146724795, 2138306, 23241, 7…
## $ Employed_2004 <dbl> 138613904, 2018783, 22146, 7…
## $ Unemployed_2004 <dbl> 8110891, 119523, 1095, 3782,…
## $ Unemployment_rate_2004 <dbl> 5.5, 5.6, 4.7, 5.1, 7.1, 5.4…
## $ Civilian_labor_force_2005 <dbl> 148597241, 2140356, 23887, 7…
## $ Employed_2005 <dbl> 141000912, 2045234, 22986, 7…
## $ Unemployed_2005 <dbl> 7596329, 95122, 901, 3027, 6…
## $ Unemployment_rate_2005 <dbl> 5.1, 4.4, 3.8, 4.0, 5.7, 4.4…
## $ Civilian_labor_force_2006 <dbl> 150707773, 2170007, 24425, 7…
## $ Employed_2006 <dbl> 143729350, 2083207, 23619, 7…
## $ Unemployed_2006 <dbl> 6978423, 86800, 806, 2543, 6…
## $ Unemployment_rate_2006 <dbl> 4.6, 4.0, 3.3, 3.2, 5.6, 4.2…
## $ Civilian_labor_force_2007 <dbl> 152191050, 2180448, 24434, 8…
## $ Employed_2007 <dbl> 145156133, 2092030, 23610, 8…
## $ Unemployed_2007 <dbl> 7034917, 88418, 824, 2616, 6…
## $ Unemployment_rate_2007 <dbl> 4.6, 4.1, 3.4, 3.2, 6.4, 4.2…
## $ Civilian_labor_force_2008 <dbl> 153761037, 2176854, 24687, 8…
## $ Employed_2008 <dbl> 144860349, 2049579, 23376, 7…
## $ Unemployed_2008 <dbl> 8900688, 127275, 1311, 3983,…
## $ Unemployment_rate_2008 <dbl> 5.8, 5.8, 5.3, 4.8, 9.1, 6.0…
## $ Civilian_labor_force_2009 <dbl> 153825454, 2156593, 24660, 8…
## $ Employed_2009 <dbl> 139594699, 1938784, 22464, 7…
## $ Unemployed_2009 <dbl> 14230755, 217809, 2196, 7364…
## $ Unemployment_rate_2009 <dbl> 9.3, 10.1, 8.9, 8.9, 13.2, 1…
## $ Civilian_labor_force_2010 <dbl> 154254521, 2197028, 25749, 8…
## $ Employed_2010 <dbl> 139393814, 1968824, 23481, 7…
## $ Unemployed_2010 <dbl> 14860707, 228204, 2268, 8268…
## $ Unemployment_rate_2010 <dbl> 9.6, 10.4, 8.8, 9.9, 12.1, 1…
## $ Civilian_labor_force_2011 <dbl> 154520678, 2202337, 25845, 8…
## $ Employed_2011 <dbl> 140688861, 1991379, 23688, 7…
## $ Unemployed_2011 <dbl> 13831817, 210958, 2157, 7618…
## $ Unemployment_rate_2011 <dbl> 9.0, 9.6, 8.3, 9.0, 11.4, 10…
## $ Civilian_labor_force_2012 <dbl> 155038121, 2178508, 25762, 8…
## $ Employed_2012 <dbl> 142527201, 2000848, 23932, 7…
## $ Unemployed_2012 <dbl> 12510920, 177660, 1830, 6534…
## $ Unemployment_rate_2012 <dbl> 8.1, 8.2, 7.1, 7.7, 11.8, 8.…
## $ Civilian_labor_force_2013 <dbl> 155362278, 2172102, 25783, 8…
## $ Employed_2013 <dbl> 143905037, 2012828, 24155, 7…
## $ Unemployed_2013 <dbl> 11457241, 159274, 1628, 5740…
## $ Unemployment_rate_2013 <dbl> 7.4, 7.3, 6.3, 6.7, 10.4, 8.…
## $ Civilian_labor_force_2014 <dbl> 155936159, 2164715, 25639, 8…
## $ Employed_2014 <dbl> 146318952, 2018705, 24150, 8…
## $ Unemployed_2014 <dbl> 9617207, 146010, 1489, 5281,…
## $ Unemployment_rate_2014 <dbl> 6.2, 6.7, 5.8, 6.1, 10.5, 7.…
## $ Civilian_labor_force_2015 <dbl> 156840649, 2152295, 25541, 8…
## $ Employed_2015 <dbl> 148554918, 2020443, 24206, 8…
## $ Unemployed_2015 <dbl> 8285731, 131852, 1335, 4874,…
## $ Unemployment_rate_2015 <dbl> 5.3, 6.1, 5.2, 5.6, 8.9, 6.7…
## $ Civilian_labor_force_2016 <dbl> 158674951, 2155729, 25710, 8…
## $ Employed_2016 <dbl> 150949349, 2029157, 24395, 8…
## $ Unemployed_2016 <dbl> 7725602, 126572, 1315, 4806,…
## $ Unemployment_rate_2016 <dbl> 4.9, 5.9, 5.1, 5.4, 8.4, 6.5…
## $ Civilian_labor_force_2017 <dbl> 160744592, 2203458, 26269, 9…
## $ Employed_2017 <dbl> 153744181, 2103873, 25224, 8…
## $ Unemployed_2017 <dbl> 7000411, 99585, 1045, 3866, …
## $ Unemployment_rate_2017 <dbl> 4.4, 4.5, 4.0, 4.2, 6.0, 4.5…
## $ Civilian_labor_force_2018 <dbl> 162039448, 2240109, 26471, 9…
## $ Employed_2018 <dbl> 155727509, 2152270, 25515, 9…
## $ Unemployed_2018 <dbl> 6311939, 87839, 956, 3501, 4…
## $ Unemployment_rate_2018 <dbl> 3.9, 3.9, 3.6, 3.6, 5.1, 4.0…
## $ Civilian_labor_force_2019 <dbl> 163815888, 2272935, 26696, 9…
## $ Employed_2019 <dbl> 157805898, 2200437, 25927, 9…
## $ Unemployed_2019 <dbl> 6009990, 72498, 769, 2896, 3…
## $ Unemployment_rate_2019 <dbl> 3.7, 3.2, 2.9, 2.9, 4.0, 3.3…
## $ Civilian_labor_force_2020 <dbl> 161483724, 2269672, 26425, 9…
## $ Employed_2020 <dbl> 148453335, 2124409, 25023, 9…
## $ Unemployed_2020 <dbl> 13030389, 145263, 1402, 6086…
## $ Unemployment_rate_2020 <dbl> 8.1, 6.4, 5.3, 6.1, 7.7, 7.3…
## $ Civilian_labor_force_2021 <dbl> 162229903, 2259349, 26545, 9…
## $ Employed_2021 <dbl> 153544980, 2183330, 25809, 9…
## $ Unemployed_2021 <dbl> 8684923, 76019, 736, 2919, 4…
## $ Unemployment_rate_2021 <dbl> 5.4, 3.4, 2.8, 2.9, 5.5, 3.4…
## $ Civilian_labor_force_2022 <dbl> 164781642, 2286028, 26789, 1…
## $ Employed_2022 <dbl> 158766998, 2226670, 26181, 1…
## $ Unemployed_2022 <dbl> 6014644, 59358, 608, 2417, 3…
## $ Unemployment_rate_2022 <dbl> 3.7, 2.6, 2.3, 2.4, 4.1, 2.5…
## $ Median_Household_Income_2021 <dbl> 69717, 53990, 66444, 65658, …
## $ Med_HH_Income_Percent_of_State_Total_2021 <dbl> NA, 100.0, 123.1, 121.6, 71.…
df1 <- input_data|>
filter(State == 'CA')|>
arrange(desc(Civilian_labor_force_2021))|>
filter(Area_Name %in% c('Los Angeles County, CA', 'Orange County, CA', 'San Diego County, CA', 'Riverside County, CA'))|>
pivot_longer(cols = -c(Area_Name, FIPS_Code, State))|>
mutate(Year = str_extract(name, pattern = '....$'))|>
mutate(name = str_remove(name, pattern = '_....$'))|>
pivot_wider(names_from = name,
values_from = value)
CAplot <- ggplot(data = df1, aes(x = Year, y = Unemployment_rate, color = Area_Name))+
geom_point()+
geom_line(aes(group = Area_Name))+
scale_x_discrete(breaks = seq(2000,2022,2)) +
labs(y = 'Unemployment Rate',
title = 'California\'s 4 largest counties unemployment rate trends',
color = 'County')+
theme_gray(base_family = "Times")
ggplotly(CAplot)
Be sure to include your original data graphic.
Summarize what you learned from the critiques. How did you incorporate those comments into your revision? What lessons did you learn?
Making the x-axis more readable and including more counties in my data graphic. I lesson that i learned was the critiques help made my graph look better. I also learned that when i was trying to do the geom line it wasn’t working so i had to group it by the area name and put the area name in geom_line instead of ggplot or else it would have shown up twice when your going over the points.
Briefly summarize what you revised in your new data graphic.
I change my graphic from a bar graph to line and point graph. I also included more counties the top 4 show the difference between there unemployment rate. I also change the x axis year to show every 2 years instead off all the years because it made my graphic hard to see the years and made it interactive.